package com.csw.spark

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object Demo07Sample {
  def main(args: Array[String]): Unit = {

    val conf: SparkConf = new SparkConf()
      .setMaster("local")
      .setAppName("sample")

    val sc: SparkContext = new SparkContext(conf)

    val rdd1: RDD[String] = sc.textFile("spark/data/students.txt")

    /**
      * sample：抽样
      */
    val rdd2: RDD[String] = rdd1.sample(true,0.01)

    rdd2.foreach(println)
  }
}
